
## 2004 Analysis

setwd("C:/Users/Nathen Huang/Dropbox/QMSS Thesis/Martha's Vineyard/Data/Ready for Analysis")
mv04 <- read.csv("2004 Martha's Vineyard - Cleaned.csv")
mv04$year <- rep(2004)

table(mv04$oppolexp)
library(arm)

which(colnames(mv04) == "lgencor1")
which(colnames(mv04) == "lpres9")

mv04sub <- mv04[,-c(5,6)]
mv04sub <- mv04[,1:9]
colnames(mv04sub)
library(psy)
cronbach(mv04sub)

mv04$legal <- rowMeans(mv04[])

install.packages('ltm')
library(ltm)

alpha(mv04sub, na.rm = TRUE)


## Identifies the number of the column

which(colnames(mv04)=="white")
which(colnames(mv04)=="dgrndpar")

demographics <- colnames(mv04[,53:71]) ## Using demographics section columns

## Opinions about the American Legal System

mv04$lgencor1 <- as.numeric(mv04$lgencor1)
mv04$lsupcor2 <- as.numeric(mv04$lsupcor2)
mv04$lpolice3 <- as.numeric(mv04$lpolice3)
mv04$llocpol4 <- as.numeric(mv04$llocpol4)
mv04$lcong7 <- as.numeric(mv04$lcong7)
mv04$llocgov8 <- as.numeric(mv04$llocgov8)
mv04$lpres9 <- as.numeric(mv04$lpres9)

## Schooling
mv04$lpubsch5 <- as.numeric(mv04$lpubsch5)
mv04$llocsch6 <- as.numeric(mv04$llocsch6)

# Breaking up into usable factors

## Factor Analysis for OLS 

library(psych)
library(zoo) ## na.approx package
library(stats) ## factanal package in R

dependents <- c("lgencor1", "lsupcor2", "lpolice3", "llocpol4", "lpubsch5", "llocsch6", "lcong7", "llocgov8", "lpres9")
outcomes04 <- blacks.04[,dependents]
outcomes04 <- na.spline(outcomes) ## FIGURE OUT HOW TO INTERPOLATE DATA LATER
outcomes <- na.omit(mv04[,dependents])

outcomes_fa <- factanal(outcomes, factors = 5, rotation = "varimax")

outcomes_fa

outcomes_fa$scores

# Consolidating the 8 variables of racial rejection sensitivity measure
rrs <- c("Subject", "parestcn", "parestex", "papolcn", "papolex", "pashopcn", "pashopex", "paatmcn" , "paatmex")

## Racial Rejection Sensitivity 

## Multiplying concern with expectation 

mv04$rrs1 <- as.numeric(mv04$parestcn * mv04$parestex) 
mv04$rrs2 <- as.numeric(mv04$papolcn * mv04$papolex)
mv04$rrs3 <- as.numeric(mv04$pashopcn * mv04$pashopex)
mv04$rrs4 <- as.numeric(mv04$paatmcn * mv04$paatmex)

rrs <- c("rrs1", "rrs2", "rrs3", "rrs4")
mv04$mean.rrs <- rowMeans(mv04[,rrs])

## Opportunities in society
mv04$opeq <- 5 - as.numeric(mv04$opeq) ## Higher value, strongly agree

mv04$opdscem <- as.numeric(mv04$opdscem)
mv04$opdscac <- as.numeric(mv04$opdscac)
mv04$ophouse <- as.numeric(mv04$ophouse)
mv04$opshop <- as.numeric(mv04$ophouse)
mv04$oppolice <- as.numeric(mv04$ophouse)

## Diversity Work and Politics 

library(QMSS)
mv04$dwpsoc <- ReverseThis(mv04$dwpsoc) ## Higher values, more exposure to diversity
mv04$dwpwork <- ReverseThis(mv04$dwpwork) 
mv04$dwpdiv <- ReverseThis(mv04$dwpdiv) 

mv04$dwppol <- ReverseThis(mv04$dwppol)
mv04$dwppoldv <- ReverseThis(mv04$dwppoldv)
mv04$dwprace <- ReverseThis(mv04$dwprace)
mv04$dwpimprs <- ReverseThis(mv04$dwpimprs)

## Variables - Demographics, col 53:74
mv04$subject.id <- NULL
mv04$date <- NULL
mv04$bgfips <- NULL

mv04$RA <- NULL 
mv04$RA2 <- NULL	
mv04$location	<- NULL
mv04$Survey <- NULL
mv04$demrace <- NULL   
mv04$white <-	as.numeric(mv04$white)
mv04$blackam <-	as.numeric(mv04$blackam)
mv04$blaknoam	<- as.numeric(mv04$blaknoam)	
mv04$demskin <- as.numeric(mv04$demskin)	
mv04$demage <- as.numeric(mv04$demage)
mv04$demgend <- as.factor(mv04$demgend)
mv04$demmarg <-	NULL
mv04$demcont <-	NULL
mv04$demcit.7	<- NULL 
mv04$dembirth <- NULL	
mv04$demclass <-	6 - as.numeric(mv04$demclass) ## Lower number, lower class
mv04$dempolt <-	as.factor(mv04$dempolt)
mv04$dlibcnsv <- as.factor(mv04$dlibcnsv)	
mv04$dedu.10 <-	as.numeric(mv04$dedu.10)
mv04$dmomedu <-	as.numeric(mv04$dmomedu)
mv04$ddadedu <-	as.numeric(mv04$ddadedu)
mv04$dincome <-	as.numeric(mv04$dincome)
mv04$dstay <-	NULL
mv04$dlocate <-	NULL
mv04$dyears <- as.numeric(mv04$dyears)	
mv04$dparvist <- as.numeric(mv04$dparvist) 
mv04$drentown <- as.factor(mv04$drentown)
mv04$dgrndpar <- as.factor(mv04$dgrndpar)
mv04$census <- NULL
mv04$red.blue	<- NULL
mv04$cenprob <-	NULL
mv04$zipcode <- NULL
mv04$RARace	<- NULL
mv04$RAskin <- as.numeric(mv04$RAskin)

## Race Categorization 

## Highly specific categorization of individuals as white 
mv04$white.check <- ifelse(mv04$white == 1 & (mv04$blackam != 1 & mv04$blaknoam != 1 & mv04$latino != 1 & mv04$asian != 1 & mv04$native	!= 1 & mv04$pacisl != 1	& mv04$other != 1), 1, 0)
table(mv04$white.check)

## Highly specific categorization of individuals as black

mv04$black.check <- ifelse((mv04$blackam == 1 | mv04$blaknoam == 1) & (mv04$white != 1 & mv04$latino != 1 & mv04$asian != 1 & mv04$native  != 1 & mv04$pacisl != 1 & mv04$other != 1), 1, 0) 
table(mv04$black.check)

mv04$race.check <- ifelse(mv04$white.check == 1, 1, ifelse(mv04$black.check == 1, 2, 3))
mv04$race.check <- as.factor(mv04$race.check)

table(mv04$race.check)


# Determining one's class using income brackets

table(mv04$demclass) ## Only 5 people felt that they were "upper class" 
table(mv04$dincome) ## Yet, there's at LEAST 39 people who make > $400k 

mv04$dincome <- as.numeric(mv04$dincome)
mv04$dempolt <- as.factor(mv04$dempolt) ## coercing political orientation into number

mv04$poor <- ifelse(mv04$dincome < 4, 1, 0)
mv04$rich <- ifelse(mv04$dincome > 6, 1, 0)

poor.04 <- filter(mv04, poor == 1)
rich.04 <- filter(mv04, rich == 1)

table(mv04$poor)
table(mv04$rich)

library(dplyr)


## Plots

ggplot(na.omit(mv04[, c("type", "lgencor1")]), 
       aes(x=type, y=lgencor1, fill = type)) +  
  geom_bar() +
  scale_fill_manual(values = c("black","bisque","black","bisque")) +
  stat_summary(fun.y="mean", geom="bar") +
  ggtitle("Favorability of General Court System, 2004") +
  theme_bw()

ggplot(na.omit(mv04[, c("type", "lsupcor2")]), 
       aes(x=type, y=lsupcor2, fill = type)) +  
  geom_bar() +
  scale_fill_manual(values = c("black","bisque","black","bisque")) +
  stat_summary(fun.y="mean", geom="bar") +
  ggtitle("Favorability of SUpreme Court, 2004") +
  theme_bw()

ggplot(na.omit(mv04[, c("type", "lpolice3")]), 
       aes(x=type, y=lpolice3, fill = type)) +  
  geom_bar() +
  scale_fill_manual(values = c("black","bisque","black","bisque")) +
  stat_summary(fun.y="mean", geom="bar") +
  ggtitle("Favorability of National Police, 2004") +
  theme_bw()

ggplot(na.omit(mv04[, c("type", "llocpol4")]), 
       aes(x=type, y=llocpol4, fill = type)) +  
  geom_bar() +
  scale_fill_manual(values = c("black","bisque","black","bisque")) +
  stat_summary(fun.y="mean", geom="bar") +
  ggtitle("Favorability of Local Police, 2004") +
  theme_bw()

ggplot(na.omit(mv04[, c("type", "lcong7")]), 
       aes(x=type, y=lcong7, fill = type)) +  
  geom_bar() +
  scale_fill_manual(values = c("black","bisque","black","bisque")) +
  stat_summary(fun.y="mean", geom="bar") +
  ggtitle("Favorability of Congress, 2004") +
  theme_bw()

ggplot(na.omit(mv04[, c("type", "lpres9")]), 
       aes(x=type, y=lpres9, fill = type)) +  
  geom_bar() +
  scale_fill_manual(values = c("black","bisque","black","bisque")) +
  stat_summary(fun.y="mean", geom="bar") +
  ggtitle("Favorability of President, 2004") +
  theme_bw()

## Categorizing the 4 types - poor/white, rich/white, poor/black, rich/black

mv04$type <- ifelse(mv04$white.check == 1, ifelse(mv04$poor == 1, "poor.white", ifelse(mv04$rich == 1, "rich.white", NA)), ## TRUE STATEMENT
                    ifelse(mv04$black.check == 1, ifelse(mv04$poor == 1, "poor.black", ifelse(mv04$rich == 1, "rich.black", NA)), NA))

table(mv04$type)

sum(69,58,435,219) ## Total respondents included 

## Two-way ANOVA of Gov Trust/police trust 

library(dplyr)

white.black.only <- filter(mv04, white.check == 1 | black.check == 1)
str(white.black.only)
str(white.black.only$white.black)

white.black.only$white.black <- ifelse(white.black.only$white.check == 1, "white", "black")
white.black.only$poor.check <- ifelse(white.black.only$dincome < 4, 1, 0)
white.black.only$rich.check <- ifelse(white.black.only$dincome > 6, 1, 0)
white.black.only$poor.rich <- ifelse(white.black.only$dincome < 4, "poor", ifelse(rich.check == 1, "rich", NA))

table(white.black.only$white.black)
table(white.black.only$poor.rich)

anova()

results2 = lm(police.trust ~ race.check + poor.rich + white.black*poor.rich, data= white.black.only)
anova(na.omit(results2))


## Mean racial rejction sensitivity 
whites.04 <- filter(mv04, white.check == 1) ## white respondents 
blacks.04 <- filter(mv04, black.check == 1)

t.test(whites.04$op.disc, blacks.04$op.disc)
cohen.d(whites.04$op.disc, blacks.04$op.disc, na.rm = TRUE)

sd(whites.04$op.disc, na.rm = TRUE)
sd(blacks.04$op.disc, na.rm = TRUE)

poor.whites.04 <- filter(whites.04, poor == 1)
rich.whites.04 <- filter(whites.04, rich == 1)

poor.blacks.04 <- filter(blacks.04, poor == 1)
rich.blacks.04 <- filter(blacks.04, rich == 1)

t.test(whites.04$mean.rrs, blacks.04$mean.rrs)
sd(whites.04$mean.rrs, na.rm = TRUE)
sd(blacks.04$mean.rrs, na.rm = TRUE)

poor.04 <- filter(mv04, dincome < 4) ## white respondents 
rich.04 <- filter(mv04, dincome > 6)
middle.class.04 <- filter(mv04, dincome >= 4 & dincome < 6)

middle.class.whites <- filter(middle.class.04, white.check == 1)
middle.class.blacks <- filter(middle.class.04, black.check == 1)
t.test(middle.class.whites$mean.rrs, middle.class.blacks$mean.rrs)
t.test(rich.blacks.04$mean.rrs, middle.class.blacks$mean.rrs)
t.test(poor.blacks.04$mean.rrs, rich.blacks.04$mean.rrs)
library(effsize)
cohen.d(poor.blacks.04$mean.rrs, rich.blacks.04$mean.rrs, na.rm = TRUE)

table(middle.class.04$race.check)
sd(poor.blacks.04$mean.rrs, na.rm = TRUE)
sd(rich.blacks.04$mean.rrs, na.rm = TRUE)

## Bonferroni Correction 
# 1. lgencor p-value 

p-value = 1.74e-11

# 2. lsupcor2 p-value

# Main questions 

## 1. lgencor1 
library(effsize)

t.test(poor.04$lgencor1, rich.04$lgencor1)
sd(poor.04$lgencor1)
sd(rich.04$lgencor1)
cohen.d(poor.04$lgencor1, rich.04$lgencor1)

val <- t.test(whites.04$lgencor1, blacks.04$lgencor1)
sd(whites.04$lgencor1)
sd(blacks.04$lgencor1)
cohen.d(whites.04$lgencor1, blacks.04$lgencor1)

t.test(poor.blacks.04$lgencor1, rich.blacks.04$lgencor1)
sd(poor.blacks.04$lgencor1)
sd(rich.blacks.04$lgencor1)
cohen.d(poor.blacks.04$lgencor1, rich.blacks.04$lgencor1)

summary(white.black.only$poor.rich)
summary(white.black.only$white.black)

results1 = lm(lgencor1 ~ white.black*poor.rich, data= white.black.only)
anova(results1)
aov1 <- aov(results1)
TukeyHSD(aov1)

anova(na.omit(results1), error.df = TRUE)
library(car)

Anova(na.omit(results1), error.df = TRUE)
summary(white.black.only$race.check)

lm1 <- lm(lgencor1 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
coefplot(lm1, parm = -1)
coefplot(lm1, xlim = c(-1,1.5))
coefplot(lm2, add=TRUE, col.pts="firebrick",  intercept=TRUE, offset = 0.4)
coefplot(lm3, add=TRUE, col.pts="darkorange", intercept=TRUE, offset = 0.4)
coefplot(lm4, add=TRUE, col.pts="yellow",  intercept=TRUE, offset=0.1)
coefplot(lm7, add=TRUE, col.pts="green", intercept=TRUE, offset=1)
coefplot(lm8, add=TRUE, col.pts="blue",  intercept=TRUE, offset=1)
coefplot(lm9, add=TRUE, col.pts="purple", intercept=TRUE, offset=1)

t1 <- t.test(blacks.04$lgencor1, whites.04$lgencor1)
t2 <- t.test(blacks.04$lsupcor2, whites.04$lsupcor2)
t3 <- t.test(blacks.04$lpolice3, whites.04$lpolice3)
t4 <- t.test(blacks.04$llocpol4, whites.04$llocpol4)
t7 <- t.test(blacks.04$lcong7, whites.04$lcong7)
t8 <- t.test(blacks.04$llocgov8, whites.04$llocgov8)
t9 <- t.test(blacks.04$lpres9, whites.04$lpres9)

t.test(blacks.04$lcong7, whites.04$lcong7)

pvals <- c(t1$p.value, t2$p.value, t3$p.value, t4$p.value, t7$p.value, t8$p.value, t9$p.value)
pvals

library(stats)
p.adjust(pvals, method = "bonferroni", n = length(pvals))

summary(lm1)
library(lmtest)
bptest(lm1)
coeftest(lm1, vocv = hccm(lm1)) ## Robust Standard Errors Correction

## 2. lsupcor2

t.test(poor.04$lsupcor2, rich.04$lsupcor2)
sd(poor.04$lsupcor2, na.rm = TRUE)
sd(rich.04$lsupcor2, na.rm = TRUE)

cohen.d(poor.04$lsupcor2, rich.04$lsupcor2, na.rm = TRUE)

t.test(whites.04$lsupcor2, blacks.04$lsupcor2)
sd(whites.04$lsupcor2)
sd(blacks.04$lsupcor2, na.rm = TRUE)
cohen.d(whites.04$lsupcor2, blacks.04$lsupcor2, na.rm = TRUE)

t.test(poor.blacks.04$lsupcor2, rich.blacks.04$lsupcor2)
sd(poor.blacks.04$lsupcor2, na.rm = TRUE)
sd(rich.blacks.04$lsupcor2, na.rm = TRUE)
cohen.d(poor.blacks.04$lsupcor2, rich.blacks.04$lsupcor2, na.rm = TRUE)

t.test(poor.whites.04$lsupcor2, rich.whites.04$lsupcor2)

results2 = lm(lsupcor2 ~ white.black + poor.rich + white.black*poor.rich, data= white.black.only)
aov2 <- aov(results2)
aov2
TukeyHSD(aov2)

lm2 <- lm(lsupcor2 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
summary(lm2)
bptest(lm2)
coeftest(lm2, vocv = hccm(lm2)) ## Robust Standard Errors Correction


## 3. lpolice3

t.test(poor.04$lpolice3, rich.04$lpolice3)
sd(poor.04$lpolice3, na.rm = TRUE)
sd(rich.04$lpolice3, na.rm = TRUE)
cohen.d(poor.04$lpolice3, rich.04$lpolice3, na.rm = TRUE)

t.test(whites.04$lpolice3, blacks.04$lpolice3)
sd(whites.04$lpolice3, na.rm = TRUE)
sd(blacks.04$lpolice3, na.rm = TRUE)
cohen.d(whites.04$lpolice3, blacks.04$lpolice3, na.rm = TRUE)

t.test(poor.blacks.04$lpolice3, rich.blacks.04$lpolice3)
sd(poor.blacks.04$lpolice3, na.rm = TRUE)
sd(rich.blacks.04$lpolice3, na.rm = TRUE)
cohen.d(poor.blacks.04$lpolice3, rich.blacks.04$lpolice3, na.rm = TRUE)

t.test(poor.whites.04$lpolice3, rich.whites.04$lpolice3)

results3 = lm(lpolice3 ~ white.black + poor.rich + white.black*poor.rich, data= white.black.only)
aov3 <- aov(results3)
TukeyHSD(aov3)
anova(na.omit(results3))

lm3 <- lm(lpolice3 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
summary(lm3)
bptest(lm3)
coeftest(lm3, vocv = hccm(lm3)) ## Robust Standard Errors Correction

# 4. llocpol4 

t.test(poor.04$llocpol4, rich.04$llocpol4)
sd(poor.04$llocpol4, na.rm = TRUE)
sd(rich.04$llocpol4, na.rm = TRUE)
cohen.d(poor.04$llocpol4, rich.04$llocpol4, na.rm = TRUE)

t.test(whites.04$lcong7, blacks.04$llocpol4)
sd(whites.04$lcong7)
sd(blacks.04$lcong7, na.rm = TRUE)
cohen.d(whites.04$llocpol4, blacks.04$llocpol4, na.rm = TRUE)

t.test(poor.blacks.04$llocpol4, rich.blacks.04$llocpol4)
sd(poor.blacks.04$llocpol4, na.rm = TRUE)
sd(rich.blacks.04$llocpol4, na.rm = TRUE)
cohen.d(poor.blacks.04$llocpol4, rich.blacks.04$llocpol4, na.rm = TRUE)

t.test(poor.whites.04$llocpol4, rich.whites.04$llocpol4)

results4 = lm(llocpol4 ~ white.black + poor.rich + white.black*poor.rich, data= white.black.only)
aov4 <- aov(results4)
TukeyHSD(aov4)
anova(na.omit(results4))

lm4 <- lm(llocpol4 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
summary(lm4)
bptest(lm4)
coeftest(lm4, vocv = hccm(lm4)) ## Robust Standard Errors Correction


# 5. lcong7

t.test(poor.04$lcong7, rich.04$lcong7)
sd(poor.04$lcong7, na.rm = TRUE)
sd(rich.04$lcong7, na.rm = TRUE)

cohen.d(poor.04$lcong7, rich.04$lcong7, na.rm = TRUE)

t.test(whites.04$lcong7, blacks.04$lcong7)
sd(whites.04$lcong7, na.rm = TRUE)
sd(blacks.04$lcong7, na.rm = TRUE)
cohen.d(whites.04$lcong7, blacks.04$lcong7, na.rm = TRUE)

t.test(poor.blacks.04$lcong7, rich.blacks.04$lcong7)
sd(poor.blacks.04$lcong7, na.rm = TRUE)
sd(rich.blacks.04$lcong7, na.rm = TRUE)
cohen.d(poor.blacks.04$lcong7, rich.blacks.04$lcong7, na.rm = TRUE)

t.test(poor.whites.04$lcong7, rich.whites.04$lcong7)

results7 = lm(lcong7 ~ white.black + poor.rich + white.black*poor.rich, data= white.black.only)
anova(results7)
aov7 <- aov(results7)
TukeyHSD(aov7)
anova(na.omit(results7))

lm7 <- lm(lcong7 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
summary(lm7)
bptest(lm7)
coeftest(lm7, vocv = hccm(lm7)) ## Robust Standard Errors Correction

# 6. llocgov 8

t.test(poor.04$llocgov8, rich.04$llocgov8)
sd(poor.04$llocgov8, na.rm = TRUE)
sd(rich.04$llocgov8, na.rm = TRUE)
cohen.d(poor.04$llocgov8, rich.04$llocgov8, na.rm = TRUE)

t.test(whites.04$llocgov8, blacks.04$llocgov8)
sd(whites.04$llocgov8, na.rm = TRUE)
sd(blacks.04$llocgov8, na.rm = TRUE)

cohen.d(whites.04$llocgov8, blacks.04$llocgov8, na.rm = TRUE)

t.test(poor.blacks.04$llocgov8, rich.blacks.04$llocgov8)
sd(poor.blacks.04$llocgov8, na.rm = TRUE)
sd(rich.blacks.04$llocgov8, na.rm = TRUE)
cohen.d(poor.blacks.04$llocgov8, rich.blacks.04$llocgov8, na.rm = TRUE)

t.test(poor.whites.04$llocgov8, rich.whites.04$llocgov8)

results8 = lm(llocgov8 ~ white.black + poor.rich + white.black*poor.rich, data= white.black.only)
aov8 <- aov(results8)
TukeyHSD(aov8)
anova(na.omit(results8))

lm8 <- lm(llocgov8 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
summary(lm8)
bptest(lm8)
coeftest(lm8, vocv = hccm(lm8)) ## Robust Standard Errors Correction

# 7. lpres9

t.test(poor.04$lpres9, rich.04$lpres9)
sd(poor.04$lpres9, na.rm = TRUE)
sd(rich.04$lpres9, na.rm = TRUE)
cohen.d(poor.04$lpres9, rich.04$lpres9, na.rm = TRUE)

t.test(whites.04$lpres9, blacks.04$lpres9)
sd(whites.04$lpres9, na.rm = TRUE)
sd(blacks.04$lpres9, na.rm = TRUE)
cohen.d(whites.04$lpres9, blacks.04$lpres9, na.rm = TRUE)

t.test(poor.blacks.04$lpres9, rich.blacks.04$lpres9)
sd(poor.blacks.04$lpres9, na.rm = TRUE)
sd(rich.blacks.04$lpres9, na.rm = TRUE)
cohen.d(poor.blacks.04$lpres9, rich.blacks.04$lpres9, na.rm = TRUE)

t.test(poor.blacks.04$mean.rrs, rich.blacks.04$mean.rrs)
sd(poor.blacks.04$mean.rrs, na.rm = TRUE)
sd(rich.blacks.04$mean.rrs, na.rm = TRUE)
cohen.d(poor.blacks.04$mean.rrs, rich.blacks.04$mean.rrs, na.rm = TRUE)

t.test(poor.whites.04$lpres9, rich.whites.04$lpres9)

results9 = lm(lpres9 ~ white.black + poor.rich + white.black*poor.rich, data= white.black.only)
aov9 <- aov(results9)
TukeyHSD(aov9)
anova(na.omit(results9))

lm9 <- lm(lpres9 ~ mean.rrs + demskin + race.check*dincome + demage + demgend + dempolt + dedu.10, data = mv04)
summary(lm9)
bptest(lm9)
coeftest(lm9, vocv = hccm(lm9)) ## Robust Standard Errors Correction

mv04$lgencor1 <- as.numeric(mv04$lgencor1)
mv04$lsupcor2 <- as.numeric(mv04$lsupcor2)
mv04$lpolice3 <- as.numeric(mv04$lpolice3)
mv04$llocpol4 <- as.numeric(mv04$llocpol4)
mv04$lcong7 <- as.numeric(mv04$lcong7)
mv04$llocgov8 <- as.numeric(mv04$llocgov8)
mv04$lpres9 <- as.numeric(mv04$lpres9)


## Data Visualization
## Visualizing Relationships using Boxplots

par(mfrow=c(1,2)) ## Two graphs at once?

library(lattice)

bwplot(type~mean.govtrust,data=mv04,
       xlab="Mean Perceived Favorability of American legal system",
       main="Views of Legal System Given Race/Class, 2004")

bwplot(type~police.trust,data=mv04,
       xlab="Mean Perceived Favorability of Police",
       main="Favorable Views of Police Given Race/Class, 2004")

## Visualizing using bar graphs

library(stringi)
library(ggplot2)

ggplot(na.omit(mv04[, c("type", "lgencor1")]), aes(x=type, y=lgencor1)) + geom_bar(stat="identity") + ggtitle("Favorability of Courts, 2004")

ggplot(na.omit(mv04[, c("type", "mean.govtrust")]), aes(x=type, y=mean.govtrust)) + geom_bar(stat="identity") + ggtitle("Favorability of Legal System, 2004")


## T-test of differences between poor/rich white/blacks 

## Assign groups 

t.test(poor.blacks.04$mean.justlaw, rich.blacks.04$mean.justlaw)
t.test(poor.blacks.04$lpres9, rich.blacks.04$lpres9)
t.test(poor.blacks.04$police.trust, rich.blacks.04$police.trust)
t.test(poor.blacks.04$police.trust, rich.blacks.04$police.trust)
t.test(poor.blacks.04$llocgov8, rich.blacks.04$llocgov8)

t.test(poor.whites.04$mean.justlaw, rich.whites.04$mean.justlaw)
t.test(poor.whites.04$police.trust, rich.whites.04$police.trust)

## White vs. Black

t.test(whites.04$mean.govtrust, blacks.04$mean.govtrust)
t.test(whites.04$police.trust, blacks.04$police.trust)

t.test(whites.04$mean.rrs, blacks.04$mean.rrs)

table(mv14$edcourtesy)

## Poor vs. Rich 

t.test(poor.04$mean.govtrust, rich.04$mean.govtrust)
t.test(poor.04$police.trust, rich.04$police.trust)
t.test(poor.04$opeq, rich.04$opeq)

table(mv04$ophouse)
table(mv04$oppolice)

mv04$ophouse <- 5 - as.numeric(mv04$ophouse)
mv04$oppolice <- 6 - as.numeric(mv04$oppolice)

mv04$op.disc <- rowMeans(mv04[,opp.disc])


## Poor whites vs. rich whites 

t.test(poor.whites.04$mean.govtrust, rich.whites.04$mean.govtrust)

t.test(poor.whites.04$police.trust, rich.whites.04$police.trust)

t.test(poor.whites.04$opeq, rich.whites.04$opeq)

## Poor blacks vs. rich blacks 

t.test(poor.blacks.04$mean.govtrust, rich.blacks.04$mean.govtrust)
t.test(poor.blacks.14$mean.govtrust, rich.blacks.14$mean.govtrust)

t.test(poor.blacks.04$police.trust, rich.blacks.04$police.trust)

t.test(poor.blacks.04$opeq, rich.blacks.04$opeq)

t.test(poor.blacks.04$mean.rrs, rich.blacks.04$mean.rrs)

## Diversity/Work/Politics 

library(QMSS)

diverse.exp <- c("dwpsoc", "dwpwork", "dwpdiv")
mv04$diverse <- rowMeans(mv04[,diverse.exp], na.rm = TRUE)

mv04$mean.exp <- rowMeans(mv04[,diverse.exp]) ## Mean exposure to people of other races

## Opportunity 

opportunity <- c("opeq", "opdscem", "opdscac", "ophouse", "opshop", "oppolice")
mv04$mean.OIS <- rowMeans(mv04[,opportunity], na.rm = TRUE)
mv14$mean.OIS <- rowMeans(mv14[,opportunity], na.rm = TRUE)

opportunity14 <- c("opeq","ophouse", "oppolice")
mv14$mean.OIS <- rowMeans(mv14[,opportunity14], na.rm = TRUE)



install.packages("psy")
library(psy)

scree.plot(outcomes_fa$correlations)

fa.parallel(outcomes)

outcomes_fa <- factanal(covmat = cor(outcomes, use = "na.or.complete"), 
                    factors = 5, rotation = "varimax")
outcomes_fa

fa.plot(outcomes_fa)


result <- PCA(outcomes)

outcomes_fa

Cronbach Alpha - 
  inherent coherence of the scale (glorified weighted correlation, 
    want to see internal consistency at least around 0.7/0.75)

install.packages("coefficientalpha")
library(coefficientalpha)

police_data 

alpha(outcomes)

fit <- factor.pa(na.omit(outcomes), nfactors=2)
fit 

fit <- factanal(na.omit(outcomes), 5, rotation="none")
print(fit, sort=TRUE)

biplot(mv.pca)

library(leaps)

library(FactoMineR)

result <- PCA(outcomes)


library(nFactors)
ev <- eigen(cor(na.omit(outcomes))) # get eigenvalues
ap <- parallel(subject=nrow(na.omit(outcomes)),var=ncol(na.omit(outcomes)),
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

## Produce a scree plot of component
fit <- princomp(na.omit(mv04[,7:15]), cor=TRUE)
summary(fit) # print variance accounted for 
loadings(fit) # pc loadings 
plot(fit,type="lines")

install.packages('nFactors')
library(nFactors)
ev <- eigen(cor(na.omit(mv04[,7:10]))) # get eigenvalues
ap <- parallel(subject=nrow(mv04),var=ncol(mv04),rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

## Predicting government/police trust using regression

mv04$demclass <- as.numeric(mv04$demclass)
table(mv04$demclass)

library(lmtest)



library(lars)
X <- model.matrix(lm1)[,-1]
y <- mv04$police.trust
Lasso <- lars(X, y, type = "lasso")
Lasso

num_size <- floor(0.50 * nrow(mv04))

set.seed(123)
train_ind <- sample(seq_len(nrow(mv04)), size = num_size)
train <- mv04[train_ind,]

library(pls)
PLS <- plsr(police.trust ~ opeq + oppolexp + demskin + white.check + black.check + demage + demgend + dempolt + dedu.10 + dincome,
            data = mv04, subset = train, validation = "LOO")


PCR <- pcr(police.trust ~ opeq + oppolexp + demskin + white.check + black.check + demage + demgend + dempolt + dedu.10 + dincome, data = mv04, subset = train, validation = "LOO")


null <- lm(police.trust ~ 1, data = mv04)
full <- lm(police.trust ~ lctafma + lpolafa + opeq + opdscem + opdscac + ophouse + oppolexp + dwppol + dwprace + dwppoldv + dwprace + dwpimprs + dwpmultb + mean.rrs + white.check + black.check + demage + demgend + dedu.10 + demclass + dempolt + dincome, data = mv04)
summary(full)

bptest(full)

str(mv04$opshop)
str(mv04$oppolice)

lmtrain <- lm(police.trust ~ dincome + log(mean.rrs) + black.check + white.check + oppolexp + opeq + demage + lpolafa + dwppol, data = mv04)
install.packages('rms')
library(rms)
lmrobcov <- robcov(lmtrain)
ols.robcov <- ols(police.trust ~ mean.rrs, data = mv04, x = T, y = T)



> robcov(ols.natcrimeM)
summary(lmtrain)
bptest(lmtrain)


mv04$demage <- as.numeric(mv04$demage)
mv04$dedu.10 <- as.numeric(mv04$dedu.10)
mv04$dmomedu <- as.numeric(mv04$dmomedu)
mv04$ddadedu <- as.numeric(mv04$ddadedu)
mv04$demgend <- as.factor(mv04$demgend)
mv04$demmarg <- as.factor(mv04$demmarg)
mv04$dempolt <- as.factor(mv04$dempolt)
mv04$dlibcnsv <- as.factor(mv04$dlibcnsv) 
mv04$demskin <- as.numeric(mv04$demskin)
mv04$dnumhse <- as.numeric(mv04$dnumhse)
mv04$dincome <- as.numeric(mv04$dincome)
mv04$demclass <- 6 - as.numeric(mv04$demclass)
mv04$white.check
mv04$black.check
mv04$mean.rrs

table(mv04$demclass)

demographics <- c("demage", "dedu.10", "dmomedu", "ddadedu", "demgend", "demmarg", "dempolt", "dlibcnsv", "demskin", "dnumhse", "dincome", "demclass", "white.check", "black.check", "mean.rrs")

step(model, data=model, direction="backward")

response <- c("police.trust")
predictors <- c("lctafma", "lpolafa", "opeq", "opdscem", "opdscac", "ophouse", "oppolexp", "dwppol", "dwprace", "dwppoldv", "dwprace", "dwpimprs", "dwpmultb", "mean.rrs", "white.check", "black.check", "demage", "demgend", "dedu.10", "demclass", "dempolt", "dincome")
no.na.data <- na.omit(mv04[c(predictors, response)])
model <- lm(formula=as.formula(paste(paste(response,'~', sep=''),
                                     paste(predictors,collapse='+'), sep='')), no.na.data)
step(model)

str(mv04$police.trust)

mv04$police.trust <- rowMeans(mv04[,police], na.rm = TRUE)
response <- c("police.trust")
demographics <- c("white.check", "black.check", "mean.rrs", "demage", "demgend", "dedu.10", "demclass", "dempolt", "dlibcnsv", "dincome")
no.na.data <- na.omit(mv04[c(demographics, response)])
model <- lm(formula=as.formula(paste(paste(response,'~', sep=''),
                                     paste(demographics,collapse='+'), sep='')), no.na.data)
step(model)

table(mv04$dlibcnsv)
